home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Sun Solutions 1997 April to September
/
Sun Solutions CD - APR '97 - SEP '97 (704-3778-12 Rev. H)(Sun Microsystems, Inc.)(1997).iso
/
products
/
.wais
/
wais_SunSolutions
/
UPDATE_INDEX
< prev
next >
Wrap
Text File
|
1997-02-28
|
3KB
|
81 lines
#!/bin/sh
#
# little script to index the file tree. We are using find instead of the
# recursive option to waisindex because the waisindex option doesn't work
# well.
#
# HEY! - this script runs IN the wais data directory
########################################################################
# variables:
#
# INDEX_NAME - the name of the index database. it should match the name
# in the /cgi-bin/newwais.pl file ($src).
# HTTP_SERVER - server hostname
# DOCROOT - root directory that httpd is serving out of.
# DIRECTORIES - list of all the directories to index
#
#
# NOTE: just changing the variable is not enough. you must change all the
# pathnames in the file to your installation specifics.
INDEX_NAME=/tmp/httpd/.wais/wais_SunSolutions/wais_SunSolutions
HTTP_SERVER=localhost:7999
DOCROOT="/tmp/httpd/.products"
#INDEX_NAME="/opt/db/wais/catalyst_catalog"
#HTTP_SERVER="pinatubo"
#DOCROOT="$CD_MOUNT/var/opt/WWW/NCSA/htdocs/CCx86-sparc"
#DOCROOT="/opt/db/wais-src"
########################################################################
# get rid of the temporary index file. if a synonym file does not exist
# create a dummy one.
#
# Shouldn't need to worry about this section
#
rm -f $INDEX_NAME.*idxable
if [ ! -f $INDEX_NAME.syn ]
then
echo "# synonym file. form is:" > $INDEX_NAME.syn
echo "# word syn0 syn1 ..." >> $INDEX_NAME.syn
echo "# e.g." >> $INDEX_NAME.syn
echo "# spam pork-shoulder yummy" >> $INDEX_NAME.syn
echo "dummy dummy" >> $INDEX_NAME.syn
fi
########################################################################
# use find to add the filenames to a temp file. if you add more file
# types (e.g. .gif is a file type)
# you'll probably want to update /cgi-bin/newwais.pl in your httpd
# httpd directory so the search result is pretty
find $DOCROOT -follow -name "*.html" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ;
find $DOCROOT -follow -name "*.ps" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ;
find $DOCROOT -follow -name "*.eps" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ;
find $DOCROOT -follow -name "*.txt" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ;
find $DOCROOT -follow -name "*.htm" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ;
########################################################################
# index the files using the temp file as input. The URL substitution
# is a feature of freeWAIS .202 and up. it transforms the filename
# into the correct URL so that relative URL's work. The general
# form is -t URL <what to strip off the front> <what to add to the front>
#
# notes:
#
# * use -a on the subsequent index runs to keep appending to the index file
# * -nocontents tells the indexer to only use the filename...the file
# contents is ignored
./waisindex -d $INDEX_NAME -export -t URL /tmp/httpd/.products http://$HTTP_SERVER -stdin < $INDEX_NAME.idxable
#./waisindex -a -nocontents -d $INDEX_NAME -export -t URL $CD_MOUNT/var/opt/WWW/NCSA/htdocs http://$HTTP_SERVER -stdin < $INDEX_NAME.notidxable